{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "fastai-sentiment-ulmfit.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U7wT7Te0Jxzr",
        "colab_type": "text"
      },
      "source": [
        "# Exporting ULMFiT with fastai\n",
        "WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version`\n",
        "\n",
        "This file includes code which was modified from https://docs.fast.ai/text.html\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pPj45JaXsmfT",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from fastai.text import *\n",
        "import pandas as pd"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_BB4WshHJuZ3",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "path = untar_data(URLs.IMDB_SAMPLE)\n",
        "path"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CJ4B9JVgJvUC",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df = pd.read_csv(path/'texts.csv')\n",
        "df.head()\n",
        "\n",
        "# Language model data\n",
        "data_lm = TextLMDataBunch.from_csv(path, 'texts.csv')\n",
        "\n",
        "# Classifier model data\n",
        "data_clas = TextClasDataBunch.from_csv(path, 'texts.csv', vocab=data_lm.train_ds.vocab, bs=32)\n",
        "data_lm.save('data_lm_export.pkl')\n",
        "data_clas.save('data_clas_export.pkl')\n",
        "data_lm = load_data(path, 'data_lm_export.pkl')\n",
        "data_clas = load_data(path, 'data_clas_export.pkl', bs=16)\n",
        "\n",
        "# Fine-tune model\n",
        "learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)\n",
        "learn.fit_one_cycle(1, 1e-2)\n",
        "learn.unfreeze()\n",
        "learn.fit_one_cycle(1, 1e-3)\n",
        "learn.predict(\"This is a review about\", n_words=10)\n",
        "learn.save_encoder('ft_enc')\n",
        "learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)\n",
        "learn.load_encoder('ft_enc')\n",
        "data_clas.show_batch()\n",
        "learn.fit_one_cycle(1, 1e-2)\n",
        "learn.freeze_to(-2)\n",
        "learn.fit_one_cycle(1, slice(5e-3/2., 5e-3))\n",
        "learn.unfreeze()\n",
        "learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))\n",
        "learn.predict(\"This was a great movie!\")\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PhaDZglceCro",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Connect to Drive\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SftzK0UadjZB",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Export model\n",
        "learn.export('/content/drive/My Drive/export.pkl')"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
